Loading data

I’m gonna look at the NYC restaurant inspection data, specifically Asian restaurants in Manhattan.

library(tidyverse)
library(p8105.datasets)
library(plotly)
nyc_inspect = 
  read_csv("data/nyc_inspec.csv") %>% 
  select(dba, boro, cuisine_description, violation_code, violation_description, score, grade, latitude, longitude)  %>% 
  filter(boro != "0", grade %in% c("A", "B", "C"))
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   camis = col_double(),
##   zipcode = col_double(),
##   phone = col_double(),
##   inspection_date = col_datetime(format = ""),
##   score = col_double(),
##   record_date = col_datetime(format = ""),
##   latitude = col_double(),
##   longitude = col_double(),
##   community_board = col_double(),
##   bin = col_double(),
##   bbl = col_double(),
##   grade_date = col_datetime(format = "")
## )
## See spec(...) for full column specifications.
## Warning: 555 parsing failures.
##   row   col               expected actual                  file
## 31017 phone no trailing characters      _ 'data/nyc_inspec.csv'
## 31251 phone no trailing characters      _ 'data/nyc_inspec.csv'
## 31266 phone no trailing characters      _ 'data/nyc_inspec.csv'
## 31477 phone no trailing characters      _ 'data/nyc_inspec.csv'
## 32470 phone no trailing characters      _ 'data/nyc_inspec.csv'
## ..... ..... ...................... ...... .....................
## See problems(...) for more details.

Plotly plots

Scatterplot

nyc_inspect %>% 
  filter(
    boro == "Manhattan", score >= 20
    ) %>% 
  drop_na(score) %>% 
  mutate(text_label = str_c("Restaurant: ", dba, "\nCuisine: ", cuisine_description, "\nScore: ", score)) %>% 
  plot_ly(
    x = ~latitude, y = ~longitude, color = ~score, text = ~text_label, 
    alpha = .5, type = "scatter", mode = "markers") %>% 
  layout(title = "Restaurants in Manhattan with inspection score greater than or equal to 20")
## Warning: Ignoring 9 observations

Boxplot

nyc_inspect %>% 
  mutate(boro = fct_reorder(boro, score)) %>% 
  mutate(text_label = str_c("Cuisine: ", cuisine_description, "\nScore: ", score)) %>% 
  plot_ly(y = ~score, x = ~boro, 
          color = ~boro, colors = "viridis",
          text = ~text_label,
          type = "box") %>% 
  layout(
    title = "Scores of restaurants in NY grouped by borough", 
    scene = list(
      xaxis = list(title = "Borough"),
      yaxis = list(title = "Inspection score")
  ))

Bar plot

nyc_inspect %>% 
  filter(
    boro == "Manhattan", 
    cuisine_description == c("Korean", "Thai", "French", "Pizza", "Italian", "Peruvian", "Geek", "Chinese", "Vietnamese", "Japanese", "Bakery", "Russian", "German", "Indian", "Delicatessen", "Café/Coffee/Tea", "Mexican", "American")) %>% 
  count(cuisine_description) %>% 
  mutate(cuisine_description = fct_reorder(cuisine_description, n)) %>% 
  plot_ly(x = ~cuisine_description, y = ~n, color = ~cuisine_description,
          type = "bar", colors = "viridis") %>% 
  layout(
    title = "Count of restaurants in Manhattan by cuisine",
    scene = list(
      xaxis = list(title = "Cuisine"),
      yaxis = list(title = "Count")
    )
  )